#######		REPLICATION FILES
#######		Conflict versus Disaster-induced Displacement: Similar or Distinct Implications for Security?
#######		Civil Wars 
#######		Heidrun Bohnet, Fabien Cottier and Simon Hug
#######		Replication: Results of the empirical analyses shown in the main text
#######		This version: September 2021


#### setup

# set working directory and load required packages

rm(list = ls())
setwd("path/to/Working/directory")
library("foreign")
library("lmtest")
library("sandwich")
library("survival")
library("stargazer")
library("plotrix")
library("arm")
library("plyr")

# function to extract quantities of interest
source("Rscripts/qi_linear.R")
source("Rscripts/qi_interaction.R")


# load data
cvdm_data <- read.csv("cvdm_data.csv")
names(cvdm_data)
idmc_df <- read.csv("IDMC_displacement.csv")
names(idmc_df)

# create dataset for full (Model M1: 1991-2011) and restricted sample (Models M2-M4) (only period 2008-2011)
cvdm_full_df <- cvdm_data
cvdm_restricted_df <- cvdm_data[cvdm_data$year>2007,]

# creation of interaction terms (Model M4)
# full overlap admin unit
cvdm_restricted_df$iterm1_pw <- cvdm_restricted_df$DIDP_1000pw*cvdm_restricted_df$idp_ra
cvdm_restricted_df$iterm2_pw <- cvdm_restricted_df$floodpw*cvdm_restricted_df$idp_ra
cvdm_restricted_df$iterm1_pw_1tl <- cvdm_restricted_df$DIDP_1000pw_1tl*cvdm_restricted_df$idp_ra
cvdm_restricted_df$iterm2_pw_1tl <- cvdm_restricted_df$floodpw_1tl*cvdm_restricted_df$idp_ra
# partial overlap admin unit
cvdm_restricted_df$iterm1_npw <- cvdm_restricted_df$DIDP_1000npw*cvdm_restricted_df$idp_ra
cvdm_restricted_df$iterm2_npw <- cvdm_restricted_df$floodnpw*cvdm_restricted_df$idp_ra
cvdm_restricted_df$iterm1_npw_1tl <- cvdm_restricted_df$DIDP_1000npw_1tl*cvdm_restricted_df$idp_ra
cvdm_restricted_df$iterm2_npw_1tl <- cvdm_restricted_df$floodnpw_1tl*cvdm_restricted_df$idp_ra
# near admin unit (with 25 km)
cvdm_restricted_df$iterm1_near <- cvdm_restricted_df$DIDP_1000near*cvdm_restricted_df$idp_ra
cvdm_restricted_df$iterm2_near <- cvdm_restricted_df$floodnear*cvdm_restricted_df$idp_ra
cvdm_restricted_df$iterm1_near_1tl <- cvdm_restricted_df$DIDP_1000near_1tl*cvdm_restricted_df$idp_ra
cvdm_restricted_df$iterm2_near_1tl <- cvdm_restricted_df$floodnear_1tl*cvdm_restricted_df$idp_ra



#### Statistical analyses

# base equation (include only control variables)
form_base <- formula( ~  agdppc_ln + iapop_ln + sc_inc_all_1tl  + sc_cl_inc_1tl + # CV admin 
    xpolity_1tl + anocracy_1tl + crgdppc_ln_1tl + cgdpg_1tl + cpop_ln_1tl + cpopg_1tl + incidencev412 + # CV country
    peaceyears_sc + peaceyears_sc_sq + peaceyears_sc_cb # peaceyears variables
    )


## Model 1
model1 <- glm(update(form_base,
    sc_inc_all ~ DIDP_1000pw + floodpw + # full overlap (t0)
    DIDP_1000npw + floodnpw + # part. overlap (t0)
    DIDP_1000near + floodnear + # near (t0)
    DIDP_1000pw_1tl + floodpw_1tl + # full overlap (t-1)
    DIDP_1000npw_1tl + floodnpw_1tl + # part. overlap (t-1)
    DIDP_1000near_1tl + floodnear_1tl + # near (t-1)
    flood_pexp + flood_pexp_sq + # count past events (all events)
    .),
  family = binomial(link = "logit"),
  data = cvdm_full_df)
# display results
coeftest(model1,vcovHC(model1,"HC0"))
# number of countries incuded
length(unique(expand.model.frame(model1, extras = "cow_code", na.expand = T)$cow_code))
# temporal horizon
length(unique(expand.model.frame(model1, extras = "year", na.expand = T)$year))
# Number of observations
nobs(model1)
# Loglik
logLik(model1)
# aic
AIC(model1)


## Model 2
model2 <- glm(update(form_base,
    sc_inc_all ~ DIDP_1000pw + floodpw + # full overlap (t0)
    DIDP_1000npw + floodnpw + # part. overlap (t0)
    DIDP_1000near + floodnear + # near (t0)
    DIDP_1000pw_1tl + floodpw_1tl + # full overlap (t-1)
    DIDP_1000npw_1tl + floodnpw_1tl + # part. overlap (t-1)
    DIDP_1000near_1tl + floodnear_1tl + # near (t-1)
    flood_pexp + flood_pexp_sq + # count past events (all events)
    .),
  family = binomial(link = "logit"),
  data = cvdm_restricted_df)
# display results
coeftest(model2,vcovHC(model2,"HC0"))
# number of countries incuded
length(unique(expand.model.frame(model2, extras = "cow_code", na.expand = T)$cow_code))
# temporal horizon
length(unique(expand.model.frame(model2, extras = "year", na.expand = T)$year))
# Number of observations
nobs(model2)
# Loglik
logLik(model2)
# aic
AIC(model2)


## Model3
model3 <- glm(update(form_base,
    sc_inc_all ~ idp_ra + # conflict IDPs
    flood_pexp + flood_pexp_sq + # count past events (all events)
    .),
  family = binomial(link = "logit"),
  data = cvdm_restricted_df)
# display results
coeftest(model3,vcovHC(model3,"HC0"))
# number of countries incuded
length(unique(expand.model.frame(model3, extras = "cow_code", na.expand = T)$cow_code))
# temporal horizon
length(unique(expand.model.frame(model3, extras = "year", na.expand = T)$year))
# Number of observations
nobs(model3)
# Loglik
logLik(model3)
# aic
AIC(model3)


## Model 4
model4 <- bayesglm(update(form_base,
    sc_inc_all ~ DIDP_1000pw + iterm1_pw + floodpw + iterm2_pw + #full overlap (t0) 
    DIDP_1000npw + iterm1_npw + floodnpw + iterm2_npw + #part. overlap (t0)
    DIDP_1000near + iterm1_near + floodnear + iterm2_near + #near (t0)
    DIDP_1000pw_1tl + iterm1_pw_1tl + floodpw_1tl + iterm2_pw_1tl +#full overlap (t-1)
    DIDP_1000npw_1tl + iterm1_npw_1tl + floodnpw_1tl + iterm2_npw_1tl + #part. overlap (t-1)
    DIDP_1000near_1tl + iterm1_near_1tl + floodnear_1tl + iterm2_near_1tl + #near (t-1)
    idp_ra + # conflict IDPs
    flood_pexp + flood_pexp_sq + # count past events (all events)
    .),
  family = binomial(link = "logit"),
  data = cvdm_restricted_df)
# display results
coeftest(model4,vcovHC(model4,"HC0"))
# number of countries incuded
length(unique(expand.model.frame(model4, extras = "cow_code", na.expand = T)$cow_code))
# temporal horizon
length(unique(expand.model.frame(model4, extras = "year", na.expand = T)$year))
# Number of observations
nobs(model4)
# Loglik
logLik(model4)
# aic
AIC(model4)




##### Figures

## Figure 1
xrange <- range(2008,max(idmc_df$year))
yrange <- range(0,50E6) 
plot(xrange,yrange,cex.lab=1.5,cex.axis=1.25,xlab="", type="n",
     ylab="Millions displaced persons",axes = F,main="")
axis(side = 2,labels = c(0:5),at = c(0,1E7,2E7,3E7,4E7,5E7))
axis(side = 1,labels = c(2008:2017),at = c(2008:2017))
lines(idmc_df$year, idmc_df$disaster_new_displacements, type="b", lwd=1.5,lty=1,pch=2)
lines(idmc_df$year[idmc_df$year>=2008], idmc_df$conflict_new_displacements[idmc_df$year>=2008], type="b", lwd=1.5,lty=2,pch=1) 
legend(2014,5E7,c("Disaster displacement","Conflict displacement"),
       cex=1.25, pch=c(1,2,4),lwd=c(1.5,1.5,1.5),lty=c(1,2,3),bty="n")
rm(xrange,yrange)


## Figure 2
cvdm_data_agg <- plyr::ddply(cvdm_full_df,c("year"),"summarize",
  sc_inc_agg = length(sc_inc_all[which(sc_inc_all==1)]),
  DIDP_1000pw_agg = length(DIDP_1000pw[which(DIDP_1000pw==1)]),
  DIDP_1000npw_agg = length(DIDP_1000npw[which(DIDP_1000npw==1)]),
  DIDP_1000near_agg = length(DIDP_1000near[which(DIDP_1000near==1)])
)
# plot
xrange <- range(cvdm_data_agg$year)
yrange <- range(cvdm_data_agg$DIDP_1000npw_agg) 
plot(range(cvdm_data_agg$year), range(cvdm_data_agg$DIDP_1000npw_agg),cex.lab=1.5,cex.axis=1.25,xlab="", type="n",
   ylab="Number of  administrative zones affected by displacement")
lines(cvdm_data_agg$year, cvdm_data_agg$DIDP_1000pw_agg, type="b", lwd=1.5,lty=1,pch=1) 
lines(cvdm_data_agg$year, cvdm_data_agg$DIDP_1000npw_agg, type="b", lwd=1.5,lty=2,pch=2)
lines(cvdm_data_agg$year, cvdm_data_agg$DIDP_1000near_agg, type="b", lwd=1.5,lty=3,pch=4)
legend(1990,220,c("Completely affected administrative units","Partially affected administrative units","Proximate administrative units"),
  cex=1.25, pch=c(1,2,4),lwd=c(1.5,1.5,1.5),lty=c(1,2,3),bty="n")
rm(xrange,yrange)


## Figure 3

# plot window limit
yat=rev(c(seq(0,2/3,by = 1/3),seq(0,2/3,by = 1/3)+(2/3+.5),seq(0,2/3,by = 1/3)+(4/3+1)))
limx=c(-.3,.3)
limx_negbin=c(-1,1)

# obtain quantity of interest
apd_matrix_m1_0tl<-qi_linear(model1,lag=F)
apd_matrix_m1_1tl<-qi_linear(model1,lag=T)
# graph average predicted difference
par(ps=15,las=1,mar=c(0,0,0,2),oma=c(4, 12, 3, 0),mfcol=c(1,2))
# plot 1
plot.new();plot.window(xlim = limx,ylim=range(yat));abline(v=0,col="grey")
plotCI(apd_matrix_m1_1tl[,1],yat,
  li=apd_matrix_m1_1tl[,3],ui=apd_matrix_m1_1tl[,6],err='x',
  gap=0,xlab="",bty="n",
  ylab="",yaxt="n",pch=16,add=T);axis(1, cex.axis=.6)
axis(side=2, at=yat, labels=c(
    "Fully affected, Floods","Fully affected, Displ.",expression(italic("Difference")),
    "Partially affected, Floods","Partially affected, Displ.",expression(italic("Difference")),
    "Proximate, Floods","Proximate, Displ.",expression(italic("Difference"))
    ),cex.axis=.6,tick=FALSE)
mtext(expression(paste(Delta," P (Social conflict)")), 1, 2.5, cex=.6, outer=FALSE)
mtext("Lagged response (T-1)", 3, 1, cex=.6,at=0, outer=FALSE)
# plot 2
plot.new();plot.window(xlim = limx,ylim=range(yat));abline(v=0,col="grey")
plotCI(apd_matrix_m1_0tl[,1],yat,
  li=apd_matrix_m1_0tl[,3],ui=apd_matrix_m1_0tl[,6],err='x',
  gap=0,xlab="Change in P (Social conflict)",bty="n",
  ylab="",yaxt="n",xlim = limx,ylim=range(yat),pch=16,add=T);axis(1,cex.axis=.6)
mtext(expression(paste(Delta," P (Social conflict)")), 1, 2.5, cex=.6, outer=FALSE)
mtext("Immediate response (T0)", 3, 1,at=0, cex=.6, outer=FALSE)


## Figure 4

# obtain quantity of interest
apd_matrix_m2_0tl<-qi_linear(model2,lag=F)
apd_matrix_m2_1tl<-qi_linear(model2,lag=T)

# graph average predicted difference
par(ps=15,las=1,mar=c(0,0,0,2),oma=c(4, 12, 3, 0),mfcol=c(1,2))
# plot 1
plot.new();plot.window(xlim = limx,ylim=range(yat));abline(v=0,col="grey")
plotCI(apd_matrix_m2_1tl[,1],yat,
  li=apd_matrix_m2_1tl[,3],ui=apd_matrix_m2_1tl[,6],err='x',
  gap=0,xlab="",bty="n",
  ylab="",yaxt="n",pch=16,add=T);axis(1,cex.axis=.6)
axis(side=2, at=yat, labels=c(
    "Fully affected, Floods","Fully affected, Displ.",expression(italic("Difference")),
    "Partially affected, Floods","Partially affected, Displ.",expression(italic("Difference")),
    "Proximate, Floods","Proximate, Displ.",expression(italic("Difference"))
    ),cex.axis=.6,tick=FALSE)
mtext(expression(paste(Delta," P (Social conflict)")), 1, 2.5, cex=.6, outer=FALSE)
mtext("Lagged response (T-1)", 3, 1, cex=.6,at=0, outer=FALSE)
# plot 2
plot.new();plot.window(xlim = limx,ylim=range(yat));abline(v=0,col="grey")
plotCI(apd_matrix_m2_0tl[,1],yat,
  li=apd_matrix_m2_0tl[,3],ui=apd_matrix_m2_0tl[,6],err='x',
  gap=0,xlab="",bty="n",
  ylab="",yaxt="n",pch=16,add=T);axis(1,cex.axis=.6)
mtext(expression(paste(Delta," P (Social conflict)")), 1, 2.5, cex=.6, outer=FALSE)
mtext("Immediate response (T0)", 3, 1, cex=.6,at=0, outer=FALSE)


##  Figure 5

# obtain quantity of interest
apd_matrix_m4_0tl<-qi_inter(model4,lag=F)
apd_matrix_m4_1tl<-qi_inter(model4,lag=T)

# graph average predicted difference
par(ps=15,las=1,mar=c(0,0,0,2),oma=c(4, 12, 3, 0),mfcol=c(1,2))
# plot 1
plot.new();plot.window(xlim = limx,ylim=range(yat));abline(v=0,col="grey")
plotCI(apd_matrix_m4_1tl[,1],yat,
  li=apd_matrix_m4_1tl[,3],ui=apd_matrix_m4_1tl[,6],err='x',
  gap=0,xlab="",bty="n",
  ylab="",yaxt="n",pch=16,add=T);axis(1,cex.axis=.6)
axis(side=2, at=yat, labels=c(
    "Fully affected, Floods","Fully affected, Displ.",expression(italic("Difference")),
    "Partially affected, Floods","Partially affected, Displ.",expression(italic("Difference")),
    "Proximate, Floods","Proximate, Displ.",expression(italic("Difference"))
    ),cex.axis=.6,tick=FALSE)
mtext(expression(paste(Delta," P (Social conflict)")), 1, 2.5, cex=.6, outer=FALSE)
mtext("Lagged response (T-1)", 3, 1, cex=.6,at=0, outer=FALSE)
# plot 2
plot.new();plot.window(xlim = limx,ylim=range(yat));abline(v=0,col="grey")
plotCI(apd_matrix_m4_0tl[,1],yat,
  li=apd_matrix_m4_0tl[,3],ui=apd_matrix_m4_0tl[,6],err='x',
  gap=0,xlab="",bty="n",
  ylab="",yaxt="n",pch=16,add=T);axis(1,cex.axis=.6)
mtext(expression(paste(Delta," P (Social conflict)")), 1, 2.5, cex=.6, outer=FALSE)
mtext("Immediate response (T0)", 3, 1, cex=.6,at=0, outer=FALSE)




#### Other statistics cited in the text

## Fn 22: Confidence interval: effects of conflict-induced displacement on social conflict (Model 3)

# obtain average predictive differences
model3.data <- model.matrix(model3)
colnames(model3.data)
model3.data[,which(colnames(model3.data)=="idp_ra")]<-0
model3.data_cf <- model3.data
model3.data_cf[,which(colnames(model3.data_cf)=="idp_ra")]<-1
set.seed(1234)
s.model3nd <- mvrnorm(1000,coef(model3),vcovHC(model3))
p.model3 <- array(0,dim=c(1000,2))
for (i in 1:1000){
  p.model3[i,1] <- mean(1/(1+exp(-(s.model3nd[i,]%*%t(model3.data)))))
  p.model3[i,2] <- mean(1/(1+exp(-(s.model3nd[i,]%*%t(model3.data_cf)))))
}

# summarize results
mean(p.model3[,2]-p.model3[,1])
median(p.model3[,2]-p.model3[,1])
quantile(p.model3[,2]-p.model3[,1], c(.025,.975))
quantile(p.model3[,2]-p.model3[,1], c(.05,.95))
